# -- Clear environment of variables and functions------------------
rm(list = ls(all = TRUE)) 

# Clear environmet of packages
if(is.null(sessionInfo()$otherPkgs) == FALSE)lapply(paste("package:", names(sessionInfo()$otherPkgs), sep=""), detach, character.only = TRUE, unload = TRUE)

# -- Load packages ---------------------------------------
library(tidyverse)
library(gridExtra)
library(janitor)
library(knitr)
library(GGally)
library(MultinomialCI)
library(htmlTable)
library(Hmisc)
library(formattable)
library(rms)
library(MultNonParam)
library(inspectdf)
library(DataExplorer)
library(leaflet)
library(ggmap)
ggmap::register_google(key = "AIzaSyDanbharxh0M9j1axst7NGeS1OnSXUf9WY")
library(htmltools)
library(colorspace)


library(readr)
fp <- read_csv("AB_NYC_2019.csv")

# make them factor variables
fp$id <- factor(fp$id)
fp$name <- factor(fp$name)
fp$host_id <- factor(fp$host_id)
fp$host_name <- factor(fp$host_name)
fp$neighbourhood_group <- factor(fp$neighbourhood_group)
fp$neighbourhood <- factor(fp$neighbourhood)
fp$room_type <- factor(fp$room_type)

# delete inactivated housing list
inactivated <- fp[ which(fp$price == 0 | fp$availability_365 == 0),]
fp <- subset(fp, fp$price > 0)
fp <- subset(fp, fp$availability_365 > 0)
# fp <- subset(fp, fp$price < 1500) # get rid of expensive data

# Add column "Listing history"
fp$list_history <- fp$number_of_reviews / fp$reviews_per_month 

# convert na values to 0s 
fp$reviews_per_month[is.na(fp$reviews_per_month)] <- 0

# create min_spend 
fp$min_spend <- fp$price * fp$minimum_nights

# delete id and host_id
delete <- c("id", "host_id")
fp[delete] <- NULL

# create borough area
manhattan <- subset(fp, fp$neighbourhood_group == "Manhattan")
brooklyn <- subset(fp, fp$neighbourhood_group == "Brooklyn")
bronx <- subset(fp, fp$neighbourhood_group == "Bronx")
queens <- subset(fp, fp$neighbourhood_group == "Queens")
staten <- subset(fp, fp$neighbourhood_group == "Staten Island")

Uni-variate non-graphical EDA

# to look at the data 
head(fp)
## # A tibble: 6 x 16
##   name  host_name neighbourhood_g… neighbourhood latitude longitude
##   <fct> <fct>     <fct>            <fct>            <dbl>     <dbl>
## 1 Clea… John      Brooklyn         Kensington        40.6     -74.0
## 2 Skyl… Jennifer  Manhattan        Midtown           40.8     -74.0
## 3 THE … Elisabeth Manhattan        Harlem            40.8     -73.9
## 4 Cozy… LisaRoxa… Brooklyn         Clinton Hill      40.7     -74.0
## 5 Larg… Chris     Manhattan        Murray Hill       40.7     -74.0
## 6 Larg… Shunichi  Manhattan        Hell's Kitch…     40.8     -74.0
## # … with 10 more variables: room_type <fct>, price <dbl>,
## #   minimum_nights <dbl>, number_of_reviews <dbl>, last_review <date>,
## #   reviews_per_month <dbl>, calculated_host_listings_count <dbl>,
## #   availability_365 <dbl>, list_history <dbl>, min_spend <dbl>

Comments

  • Some last_review does not have value - assuming there is no review for those rows
# to see how many observations, variables, types etc
str(fp)
## Classes 'tbl_df', 'tbl' and 'data.frame':    31354 obs. of  16 variables:
##  $ name                          : Factor w/ 47894 levels "_Special Offers: Guest Assistance",..: 12563 38007 45006 15581 24841 24888 17562 5566 10757 26708 ...
##  $ host_name                     : Factor w/ 11452 levels "​ Valéria","-TheQueensCornerLot",..: 4996 4790 2912 6209 1937 9648 1234 6029 5408 5958 ...
##  $ neighbourhood_group           : Factor w/ 5 levels "Bronx","Brooklyn",..: 2 3 3 2 3 3 3 3 3 2 ...
##  $ neighbourhood                 : Factor w/ 221 levels "Allerton","Arden Heights",..: 109 128 95 42 138 96 36 203 96 183 ...
##  $ latitude                      : num  40.6 40.8 40.8 40.7 40.7 ...
##  $ longitude                     : num  -74 -74 -73.9 -74 -74 ...
##  $ room_type                     : Factor w/ 3 levels "Entire home/apt",..: 2 1 2 1 1 2 1 1 2 2 ...
##  $ price                         : num  149 225 150 89 200 79 150 135 85 89 ...
##  $ minimum_nights                : num  1 1 3 1 3 2 1 5 2 4 ...
##  $ number_of_reviews             : num  9 45 0 270 74 430 160 53 188 167 ...
##  $ last_review                   : Date, format: "2018-10-19" "2019-05-21" ...
##  $ reviews_per_month             : num  0.21 0.38 0 4.64 0.59 3.47 1.33 0.43 1.5 1.34 ...
##  $ calculated_host_listings_count: num  6 2 1 1 1 1 4 1 1 3 ...
##  $ availability_365              : num  365 355 365 194 129 220 188 6 39 314 ...
##  $ list_history                  : num  42.9 118.4 NA 58.2 125.4 ...
##  $ min_spend                     : num  149 225 450 89 600 158 150 675 170 356 ...

Comments

  • negative longitude means West of the Prime Meridian
summary(fp)
##                                          name              host_name    
##  Hillside Hotel                            :   18   Sonder (NYC):  327  
##  Home away from home                       :   13   Michael     :  242  
##  New york Multi-unit building              :   12   Blueground  :  232  
##  Loft Suite @ The Box House Hotel          :   11   David       :  228  
##  Artsy Private BR in Fort Greene Cumberland:   10   John        :  211  
##  (Other)                                   :31286   (Other)     :30106  
##  NA's                                      :    4   NA's        :    8  
##     neighbourhood_group            neighbourhood      latitude    
##  Bronx        :  913    Bedford-Stuyvesant: 2478   Min.   :40.50  
##  Brooklyn     :12253    Williamsburg      : 2051   1st Qu.:40.69  
##  Manhattan    :13559    Harlem            : 1734   Median :40.72  
##  Queens       : 4298    Bushwick          : 1447   Mean   :40.73  
##  Staten Island:  331    Hell's Kitchen    : 1446   3rd Qu.:40.76  
##                         Upper East Side   : 1095   Max.   :40.91  
##                         (Other)           :21103                  
##    longitude                room_type         price        
##  Min.   :-74.24   Entire home/apt:16532   Min.   :   10.0  
##  1st Qu.:-73.98   Private room   :13960   1st Qu.:   70.0  
##  Median :-73.95   Shared room    :  862   Median :  112.0  
##  Mean   :-73.95                           Mean   :  162.1  
##  3rd Qu.:-73.93                           3rd Qu.:  189.0  
##  Max.   :-73.71                           Max.   :10000.0  
##                                                            
##  minimum_nights     number_of_reviews  last_review        
##  Min.   :   1.000   Min.   :  0.00    Min.   :2011-04-25  
##  1st Qu.:   2.000   1st Qu.:  2.00    1st Qu.:2019-04-28  
##  Median :   3.000   Median : 10.00    Median :2019-06-15  
##  Mean   :   8.338   Mean   : 31.85    Mean   :2019-03-19  
##  3rd Qu.:   5.000   3rd Qu.: 39.00    3rd Qu.:2019-06-28  
##  Max.   :1250.000   Max.   :629.00    Max.   :2019-07-08  
##                                       NA's   :5207        
##  reviews_per_month calculated_host_listings_count availability_365
##  Min.   : 0.000    Min.   :  1.00                 Min.   :  1.0   
##  1st Qu.: 0.170    1st Qu.:  1.00                 1st Qu.: 55.0   
##  Median : 0.920    Median :  1.00                 Median :168.0   
##  Mean   : 1.509    Mean   : 10.34                 Mean   :175.8   
##  3rd Qu.: 2.340    3rd Qu.:  3.00                 3rd Qu.:305.0   
##  Max.   :58.500    Max.   :327.00                 Max.   :365.0   
##                                                                   
##   list_history       min_spend      
##  Min.   :  1.000   Min.   :     10  
##  1st Qu.:  6.828   1st Qu.:    140  
##  Median : 18.859   Median :    330  
##  Mean   : 25.783   Mean   :   1591  
##  3rd Qu.: 38.636   3rd Qu.:    900  
##  Max.   :125.424   Max.   :1170000  
##  NA's   :5207

Comments

  • categorical variables: neighbourhood_group, neighbourhood, room_type
  • numerical variables: latitude, longtitude, price, minimum_nights, number_of_reviews, reviews_per_month, calculated_host_listings_count, availability_365, list_history
  • last_review: it has many NA values, it may be removed
  • Most popular neighbourhood: Brooklyn(Williamsburg,Bedford-Stuyvesant, Bushwick), Manhattan (Harlem, Upper West Side, Hell’s Kitchen)

Uni-variate graphical EDA

Categorical

All New York City

grid.arrange(
# neighbourhood_group
ggplot(data = fp, mapping = aes(x = neighbourhood_group)) +
  geom_bar(),
# neighbourhood
ggplot(data = fp, mapping = aes(x = neighbourhood)) +
  geom_bar(),
# room_type
ggplot(data = fp, mapping = aes(x = room_type)) +
  geom_bar(),
ncol = 1)

Manhattan

# Manhattan
grid.arrange(
# neighbourhood
ggplot(data = manhattan, mapping = aes(x = neighbourhood)) +
  coord_flip() +
  geom_bar(),
# room_type
ggplot(data = manhattan, mapping = aes(x = room_type)) +
  geom_bar(),
ncol = 2 )

Brooklyn

# brooklyn
grid.arrange(
# neighbourhood
ggplot(data = brooklyn, mapping = aes(x = neighbourhood)) +
  coord_flip() +
  geom_bar(),
# room_type
ggplot(data = brooklyn, mapping = aes(x = room_type)) +
  geom_bar(),
ncol = 2 )

Bronx

# brooklyn
grid.arrange(
# neighbourhood
ggplot(data = bronx, mapping = aes(x = neighbourhood)) +
  coord_flip() +
  geom_bar(),
# room_type
ggplot(data = bronx, mapping = aes(x = room_type)) +
  geom_bar(),
ncol = 2 )

Staten island

# brooklyn
grid.arrange(
# neighbourhood
ggplot(data = staten, mapping = aes(x = neighbourhood)) +
  coord_flip() +
  geom_bar(),
# room_type
ggplot(data = staten, mapping = aes(x = room_type)) +
  geom_bar(),
ncol = 2 )

Frequency of categorical variables

fp %>% 
  inspect_cat() %>% 
  show_plot()

Comments

  • Surprisingly, Manhattan has many Entire home/apt than any other parts of NYC.
  • Brooklyn has almost the same amount of Entire home/apt and private room
  • Bronx and Queens have more private room than Entire home/apt
  • There are popular neighbour

Quantative

price

All NYC

grid.arrange(
ggplot(data = fp, mapping = aes(x = price)) + 
    geom_histogram(),
ggplot(data = fp, mapping = aes(x = 1)) + 
    geom_boxplot(mapping = aes(y = price)) +
    coord_flip(), 
ncol = 1 )

Comments

  • Skewed

minimum_nights

grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = minimum_nights)) + 
    geom_histogram(),

# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) + 
    geom_boxplot(mapping = aes(y = minimum_nights)) +
    coord_flip(), # use to have same x-axis on both graphs

# Set number of columns in grid.arrange
ncol = 1 )

Comments

  • Skewed

number_of_reviews

grid.arrange(
ggplot(data = fp, mapping = aes(x = number_of_reviews)) + 
    geom_histogram(),
ggplot(data = fp, mapping = aes(x = 1)) + 
    geom_boxplot(mapping = aes(y = number_of_reviews)) +
    coord_flip(), # use to have same x-axis on both graphs
ncol = 1 )

Comments

  • Skewed

reviews_per_month

grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = reviews_per_month)) + 
    geom_histogram(),

# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) + 
    geom_boxplot(mapping = aes(y = reviews_per_month)) +
    coord_flip(), # use to have same x-axis on both graphs

# Set number of columns in grid.arrange
ncol = 1 )

Comments

  • Skewed

calculated_host_listings_count

grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = calculated_host_listings_count)) + 
    geom_histogram(),

# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) + 
    geom_boxplot(mapping = aes(y = calculated_host_listings_count)) +
    coord_flip(), # use to have same x-axis on both graphs

# Set number of columns in grid.arrange
ncol = 1 )

Comments

  • Skewed

availability_365

grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = availability_365)) + 
    geom_histogram(),

# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) + 
    geom_boxplot(mapping = aes(y = availability_365)) +
    coord_flip(), # use to have same x-axis on both graphs

# Set number of columns in grid.arrange
ncol = 1 )

Comments

  • Skewed

list_history

grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = list_history)) + 
    geom_histogram(),

# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) + 
    geom_boxplot(mapping = aes(y = list_history)) +
    coord_flip(), # use to have same x-axis on both graphs

# Set number of columns in grid.arrange
ncol = 1 )

Comments

  • Skewed
  • Many airbnb housing/room has been recently added (less than 3 years)

Show all numerical variables

# Distribution of numeric variable 
fp %>% 
  inspect_num() %>% 
  show_plot()

Questions

  • It is hard to see the detail of price because there are some huge outliers

Multi-variate non-graphical

Categorical

neighbourhood_group & room_type

fp %>% 
  tabyl(neighbourhood_group, room_type) %>% # creates table of counts
  adorn_totals(where = c("row", "col")) # Total margins
##  neighbourhood_group Entire home/apt Private room Shared room Total
##                Bronx             318          545          50   913
##             Brooklyn            6071         5876         306 12253
##            Manhattan            8441         4782         336 13559
##               Queens            1550         2586         162  4298
##        Staten Island             152          171           8   331
##                Total           16532        13960         862 31354
fp %>% 
  tabyl(neighbourhood_group, room_type) %>% 
  adorn_totals(where = c("row", "col")) %>% 
  adorn_percentages(denominator = "all") %>% # creates proportions
  adorn_rounding(2) # round decimals
##  neighbourhood_group Entire home/apt Private room Shared room Total
##                Bronx            0.01         0.02        0.00  0.03
##             Brooklyn            0.19         0.19        0.01  0.39
##            Manhattan            0.27         0.15        0.01  0.43
##               Queens            0.05         0.08        0.01  0.14
##        Staten Island            0.00         0.01        0.00  0.01
##                Total            0.53         0.45        0.03  1.00

Comments

neighbourhood & room_type Manhattan

# 
fp %>% 
  tabyl(neighbourhood, room_type) %>% # creates table of counts
  adorn_totals(where = c("row", "col")) # Total margins
##               neighbourhood Entire home/apt Private room Shared room Total
##                    Allerton              15           22           0    37
##               Arden Heights               3            0           0     3
##                    Arrochar              10           11           0    21
##                     Arverne              46           25           2    73
##                     Astoria             213          317          18   548
##                  Bath Beach               5           10           0    15
##           Battery Park City              23           10           1    34
##                   Bay Ridge              54           51           1   106
##                 Bay Terrace               3            2           1     6
##  Bay Terrace, Staten Island               0            0           0     0
##                  Baychester               3            3           0     6
##                     Bayside              12           24           0    36
##                   Bayswater               5           12           0    17
##          Bedford-Stuyvesant            1161         1248          69  2478
##                Belle Harbor               5            3           0     8
##                   Bellerose               2            9           1    12
##                     Belmont               3           13           1    17
##                 Bensonhurst              23           31           1    55
##                Bergen Beach               8            2           0    10
##                 Boerum Hill              81           18           0    99
##                Borough Park              22           74          16   112
##                Breezy Point               0            3           0     3
##                   Briarwood              18           27           3    48
##              Brighton Beach              26           32           6    64
##                   Bronxdale               4            8           1    13
##            Brooklyn Heights              69           14           0    83
##                 Brownsville              17           31           0    48
##                 Bull's Head               0            4           0     4
##                    Bushwick             445          952          50  1447
##             Cambria Heights               4           16           0    20
##                    Canarsie              73           66           1   140
##             Carroll Gardens              96           26           0   122
##                 Castle Hill               4            5           0     9
##           Castleton Corners               2            1           0     3
##                     Chelsea             520          176          10   706
##                   Chinatown             134           90           2   226
##                 City Island              10            7           0    17
##                Civic Center              18            9           0    27
##           Claremont Village               8           20           0    28
##                Clason Point               7           11           2    20
##                     Clifton               7            3           4    14
##                Clinton Hill             200          117           5   322
##                  Co-op City               0            2           0     2
##                 Cobble Hill              48           10           0    58
##               College Point               9            6           0    15
##                 Columbia St              21            5           0    26
##                     Concord               4           19           3    26
##                   Concourse              14           25           0    39
##           Concourse Village               6           18           0    24
##                Coney Island               9            3           2    14
##                      Corona              12           26          24    62
##               Crown Heights             483          413          16   912
##               Cypress Hills              49           62           6   117
##            Ditmars Steinway              80          130           0   210
##                Dongan Hills               3            3           0     6
##                  Douglaston               3            5           0     8
##           Downtown Brooklyn              25           13           0    38
##                       DUMBO              12            8           0    20
##               Dyker Heights               5            4           1    10
##               East Elmhurst              45          113           5   163
##               East Flatbush             157          254          22   433
##                 East Harlem             318          384          34   736
##             East Morrisania               3            6           0     9
##               East New York              89          117           5   211
##                East Village             630          319           8   957
##                 Eastchester               6            6           1    13
##                    Edenwald               6            7           0    13
##                    Edgemere               4            6           0    10
##                    Elmhurst              44          128           7   179
##                 Eltingville               2            1           0     3
##                Emerson Hill               2            2           0     4
##                Far Rockaway               9           16           0    25
##                   Fieldston               3            3           1     7
##          Financial District             479           88           5   572
##                    Flatbush             154          196          17   367
##           Flatiron District              41            8           0    49
##                   Flatlands              31           42           1    74
##                    Flushing              92          269           8   369
##                     Fordham              12           40           4    56
##                Forest Hills              39           54           6    99
##                 Fort Greene             172          109           6   287
##               Fort Hamilton              22           18           2    42
##              Fort Wadsworth               1            0           0     1
##               Fresh Meadows              13           14           0    27
##                    Glendale              21           21           0    42
##                     Gowanus              82           69           1   152
##                    Gramercy             149           45           2   196
##                Graniteville               1            1           0     2
##                  Grant City               4            2           0     6
##                   Gravesend              23           15           8    46
##                 Great Kills               5            5           0    10
##                  Greenpoint             346          225          14   585
##           Greenwich Village             163           43           1   207
##                 Grymes Hill               5            1           0     6
##                      Harlem             674         1021          39  1734
##              Hell's Kitchen             886          472          88  1446
##                  Highbridge               5           19           0    24
##                      Hollis               3            9           1    13
##                  Holliswood               0            3           0     3
##                Howard Beach              10            6           0    16
##                Howland Hook               2            0           0     2
##                    Huguenot               3            0           0     3
##                 Hunts Point               0           16           0    16
##                      Inwood              55           99           2   156
##             Jackson Heights              46           93          11   150
##                     Jamaica              74          131           6   211
##             Jamaica Estates              15            3           0    18
##               Jamaica Hills               3            3           0     6
##                  Kensington              38           61           3   102
##                 Kew Gardens               4           19           3    26
##           Kew Gardens Hills              12            7           0    19
##                 Kingsbridge              15           41           2    58
##                    Kips Bay             213           52          13   278
##                   Laurelton               9            7           0    16
##             Lighthouse Hill               2            0           0     2
##                Little Italy              53           25           2    80
##                 Little Neck               1            3           1     5
##            Long Island City             146          201          10   357
##                    Longwood              14           29           1    44
##             Lower East Side             313          186          31   530
##             Manhattan Beach               4            2           0     6
##                 Marble Hill               2            4           0     6
##             Mariners Harbor               3            5           0     8
##                     Maspeth              41           54           3    98
##                     Melrose               5            2           0     7
##              Middle Village              20            9           0    29
##               Midland Beach               4            1           0     5
##                     Midtown             802          272          13  1087
##                     Midwood              30           34           5    69
##                  Mill Basin               4            0           0     4
##         Morningside Heights              43           84           1   128
##              Morris Heights               2           12           0    14
##                 Morris Park               3            9           1    13
##                  Morrisania               3            8           3    14
##                  Mott Haven              23           29           0    52
##                  Mount Eden               0            3           0     3
##                  Mount Hope               9            7           0    16
##                 Murray Hill             313           46           7   366
##                   Navy Yard               3            3           0     6
##                    Neponsit               3            0           0     3
##                New Brighton               1            4           0     5
##                    New Dorp               0            0           0     0
##              New Dorp Beach               1            3           0     4
##             New Springville               2            4           0     6
##                        NoHo              49            4           0    53
##                      Nolita              88           41           2   131
##             North Riverdale               3            5           0     8
##                     Norwood               3           18           1    22
##                     Oakwood               2            2           0     4
##                   Olinville               0            1           2     3
##                  Ozone Park              30           20           1    51
##                  Park Slope             216           73           0   289
##                 Parkchester               7           19           5    31
##                  Pelham Bay              11            3           2    16
##              Pelham Gardens              17            8           1    26
##                 Port Morris              14           12          15    41
##               Port Richmond               0            5           0     5
##                Prince's Bay               2            1           0     3
##            Prospect Heights             116           73           0   189
##   Prospect-Lefferts Gardens             160          156           9   325
##              Queens Village              32           18           1    51
##               Randall Manor               9            9           1    19
##                    Red Hook              34           18           1    53
##                   Rego Park              35           46           4    85
##               Richmond Hill              32           48           3    83
##                Richmondtown               1            0           0     1
##                   Ridgewood              85          182          10   277
##                   Riverdale               4            3           1     8
##              Rockaway Beach              37           12           0    49
##            Roosevelt Island               9           25           1    35
##                    Rosebank               4            2           0     6
##                    Rosedale              16           38           0    54
##                   Rossville               1            0           0     1
##               Schuylerville               2            9           1    12
##                    Sea Gate               3            0           0     3
##              Sheepshead Bay              45           70          17   132
##                 Shore Acres               3            2           0     5
##                 Silver Lake               1            0           0     1
##                        SoHo             166           77           1   244
##                   Soundview               3            9           0    12
##                 South Beach               1            6           0     7
##            South Ozone Park              13           17           8    38
##                 South Slope             112           56           0   168
##         Springfield Gardens              31           47           1    79
##              Spuyten Duyvil               3            1           0     4
##                  St. Albans              27           45           0    72
##                  St. George              25           17           0    42
##                   Stapleton               9           16           0    25
##             Stuyvesant Town               7           10           1    18
##                   Sunnyside              76          148          21   245
##                 Sunset Park             118          128           3   249
##            Theater District             134          103           2   239
##                 Throgs Neck              11           12           0    23
##                   Todt Hill               2            1           0     3
##               Tompkinsville              13           24           0    37
##                 Tottenville               5            2           0     7
##                     Tremont               2            3           2     7
##                     Tribeca             108           13           0   121
##                 Two Bridges              14           33           0    47
##                   Unionport               4            3           0     7
##          University Heights               4           11           0    15
##             Upper East Side             795          270          30  1095
##             Upper West Side             712          354          23  1089
##                    Van Nest               5            3           3    11
##                Vinegar Hill              13            7           1    21
##                   Wakefield              16           26           0    42
##          Washington Heights             175          357          16   548
##               West Brighton               5           13           0    18
##                  West Farms               1            1           0     2
##                West Village             355           62           1   418
##          Westchester Square               4            5           0     9
##                 Westerleigh               1            1           0     2
##                  Whitestone               3            7           0    10
##              Williamsbridge              17           17           0    34
##                Williamsburg            1098          936          17  2051
##                 Willowbrook               1            0           0     1
##             Windsor Terrace              69           24           0    93
##                   Woodhaven              16           61           2    79
##                    Woodlawn               4            5           0     9
##                     Woodrow               0            0           0     0
##                    Woodside              51          123           1   175
##                       Total           16532        13960         862 31354
fp %>% 
  tabyl(neighbourhood, room_type) %>% 
  adorn_totals(where = c("row", "col")) %>% 
  adorn_percentages(denominator = "all") %>% # creates proportions
  adorn_rounding(2) # round decimals
##               neighbourhood Entire home/apt Private room Shared room Total
##                    Allerton            0.00         0.00        0.00  0.00
##               Arden Heights            0.00         0.00        0.00  0.00
##                    Arrochar            0.00         0.00        0.00  0.00
##                     Arverne            0.00         0.00        0.00  0.00
##                     Astoria            0.01         0.01        0.00  0.02
##                  Bath Beach            0.00         0.00        0.00  0.00
##           Battery Park City            0.00         0.00        0.00  0.00
##                   Bay Ridge            0.00         0.00        0.00  0.00
##                 Bay Terrace            0.00         0.00        0.00  0.00
##  Bay Terrace, Staten Island            0.00         0.00        0.00  0.00
##                  Baychester            0.00         0.00        0.00  0.00
##                     Bayside            0.00         0.00        0.00  0.00
##                   Bayswater            0.00         0.00        0.00  0.00
##          Bedford-Stuyvesant            0.04         0.04        0.00  0.08
##                Belle Harbor            0.00         0.00        0.00  0.00
##                   Bellerose            0.00         0.00        0.00  0.00
##                     Belmont            0.00         0.00        0.00  0.00
##                 Bensonhurst            0.00         0.00        0.00  0.00
##                Bergen Beach            0.00         0.00        0.00  0.00
##                 Boerum Hill            0.00         0.00        0.00  0.00
##                Borough Park            0.00         0.00        0.00  0.00
##                Breezy Point            0.00         0.00        0.00  0.00
##                   Briarwood            0.00         0.00        0.00  0.00
##              Brighton Beach            0.00         0.00        0.00  0.00
##                   Bronxdale            0.00         0.00        0.00  0.00
##            Brooklyn Heights            0.00         0.00        0.00  0.00
##                 Brownsville            0.00         0.00        0.00  0.00
##                 Bull's Head            0.00         0.00        0.00  0.00
##                    Bushwick            0.01         0.03        0.00  0.05
##             Cambria Heights            0.00         0.00        0.00  0.00
##                    Canarsie            0.00         0.00        0.00  0.00
##             Carroll Gardens            0.00         0.00        0.00  0.00
##                 Castle Hill            0.00         0.00        0.00  0.00
##           Castleton Corners            0.00         0.00        0.00  0.00
##                     Chelsea            0.02         0.01        0.00  0.02
##                   Chinatown            0.00         0.00        0.00  0.01
##                 City Island            0.00         0.00        0.00  0.00
##                Civic Center            0.00         0.00        0.00  0.00
##           Claremont Village            0.00         0.00        0.00  0.00
##                Clason Point            0.00         0.00        0.00  0.00
##                     Clifton            0.00         0.00        0.00  0.00
##                Clinton Hill            0.01         0.00        0.00  0.01
##                  Co-op City            0.00         0.00        0.00  0.00
##                 Cobble Hill            0.00         0.00        0.00  0.00
##               College Point            0.00         0.00        0.00  0.00
##                 Columbia St            0.00         0.00        0.00  0.00
##                     Concord            0.00         0.00        0.00  0.00
##                   Concourse            0.00         0.00        0.00  0.00
##           Concourse Village            0.00         0.00        0.00  0.00
##                Coney Island            0.00         0.00        0.00  0.00
##                      Corona            0.00         0.00        0.00  0.00
##               Crown Heights            0.02         0.01        0.00  0.03
##               Cypress Hills            0.00         0.00        0.00  0.00
##            Ditmars Steinway            0.00         0.00        0.00  0.01
##                Dongan Hills            0.00         0.00        0.00  0.00
##                  Douglaston            0.00         0.00        0.00  0.00
##           Downtown Brooklyn            0.00         0.00        0.00  0.00
##                       DUMBO            0.00         0.00        0.00  0.00
##               Dyker Heights            0.00         0.00        0.00  0.00
##               East Elmhurst            0.00         0.00        0.00  0.01
##               East Flatbush            0.01         0.01        0.00  0.01
##                 East Harlem            0.01         0.01        0.00  0.02
##             East Morrisania            0.00         0.00        0.00  0.00
##               East New York            0.00         0.00        0.00  0.01
##                East Village            0.02         0.01        0.00  0.03
##                 Eastchester            0.00         0.00        0.00  0.00
##                    Edenwald            0.00         0.00        0.00  0.00
##                    Edgemere            0.00         0.00        0.00  0.00
##                    Elmhurst            0.00         0.00        0.00  0.01
##                 Eltingville            0.00         0.00        0.00  0.00
##                Emerson Hill            0.00         0.00        0.00  0.00
##                Far Rockaway            0.00         0.00        0.00  0.00
##                   Fieldston            0.00         0.00        0.00  0.00
##          Financial District            0.02         0.00        0.00  0.02
##                    Flatbush            0.00         0.01        0.00  0.01
##           Flatiron District            0.00         0.00        0.00  0.00
##                   Flatlands            0.00         0.00        0.00  0.00
##                    Flushing            0.00         0.01        0.00  0.01
##                     Fordham            0.00         0.00        0.00  0.00
##                Forest Hills            0.00         0.00        0.00  0.00
##                 Fort Greene            0.01         0.00        0.00  0.01
##               Fort Hamilton            0.00         0.00        0.00  0.00
##              Fort Wadsworth            0.00         0.00        0.00  0.00
##               Fresh Meadows            0.00         0.00        0.00  0.00
##                    Glendale            0.00         0.00        0.00  0.00
##                     Gowanus            0.00         0.00        0.00  0.00
##                    Gramercy            0.00         0.00        0.00  0.01
##                Graniteville            0.00         0.00        0.00  0.00
##                  Grant City            0.00         0.00        0.00  0.00
##                   Gravesend            0.00         0.00        0.00  0.00
##                 Great Kills            0.00         0.00        0.00  0.00
##                  Greenpoint            0.01         0.01        0.00  0.02
##           Greenwich Village            0.01         0.00        0.00  0.01
##                 Grymes Hill            0.00         0.00        0.00  0.00
##                      Harlem            0.02         0.03        0.00  0.06
##              Hell's Kitchen            0.03         0.02        0.00  0.05
##                  Highbridge            0.00         0.00        0.00  0.00
##                      Hollis            0.00         0.00        0.00  0.00
##                  Holliswood            0.00         0.00        0.00  0.00
##                Howard Beach            0.00         0.00        0.00  0.00
##                Howland Hook            0.00         0.00        0.00  0.00
##                    Huguenot            0.00         0.00        0.00  0.00
##                 Hunts Point            0.00         0.00        0.00  0.00
##                      Inwood            0.00         0.00        0.00  0.00
##             Jackson Heights            0.00         0.00        0.00  0.00
##                     Jamaica            0.00         0.00        0.00  0.01
##             Jamaica Estates            0.00         0.00        0.00  0.00
##               Jamaica Hills            0.00         0.00        0.00  0.00
##                  Kensington            0.00         0.00        0.00  0.00
##                 Kew Gardens            0.00         0.00        0.00  0.00
##           Kew Gardens Hills            0.00         0.00        0.00  0.00
##                 Kingsbridge            0.00         0.00        0.00  0.00
##                    Kips Bay            0.01         0.00        0.00  0.01
##                   Laurelton            0.00         0.00        0.00  0.00
##             Lighthouse Hill            0.00         0.00        0.00  0.00
##                Little Italy            0.00         0.00        0.00  0.00
##                 Little Neck            0.00         0.00        0.00  0.00
##            Long Island City            0.00         0.01        0.00  0.01
##                    Longwood            0.00         0.00        0.00  0.00
##             Lower East Side            0.01         0.01        0.00  0.02
##             Manhattan Beach            0.00         0.00        0.00  0.00
##                 Marble Hill            0.00         0.00        0.00  0.00
##             Mariners Harbor            0.00         0.00        0.00  0.00
##                     Maspeth            0.00         0.00        0.00  0.00
##                     Melrose            0.00         0.00        0.00  0.00
##              Middle Village            0.00         0.00        0.00  0.00
##               Midland Beach            0.00         0.00        0.00  0.00
##                     Midtown            0.03         0.01        0.00  0.03
##                     Midwood            0.00         0.00        0.00  0.00
##                  Mill Basin            0.00         0.00        0.00  0.00
##         Morningside Heights            0.00         0.00        0.00  0.00
##              Morris Heights            0.00         0.00        0.00  0.00
##                 Morris Park            0.00         0.00        0.00  0.00
##                  Morrisania            0.00         0.00        0.00  0.00
##                  Mott Haven            0.00         0.00        0.00  0.00
##                  Mount Eden            0.00         0.00        0.00  0.00
##                  Mount Hope            0.00         0.00        0.00  0.00
##                 Murray Hill            0.01         0.00        0.00  0.01
##                   Navy Yard            0.00         0.00        0.00  0.00
##                    Neponsit            0.00         0.00        0.00  0.00
##                New Brighton            0.00         0.00        0.00  0.00
##                    New Dorp            0.00         0.00        0.00  0.00
##              New Dorp Beach            0.00         0.00        0.00  0.00
##             New Springville            0.00         0.00        0.00  0.00
##                        NoHo            0.00         0.00        0.00  0.00
##                      Nolita            0.00         0.00        0.00  0.00
##             North Riverdale            0.00         0.00        0.00  0.00
##                     Norwood            0.00         0.00        0.00  0.00
##                     Oakwood            0.00         0.00        0.00  0.00
##                   Olinville            0.00         0.00        0.00  0.00
##                  Ozone Park            0.00         0.00        0.00  0.00
##                  Park Slope            0.01         0.00        0.00  0.01
##                 Parkchester            0.00         0.00        0.00  0.00
##                  Pelham Bay            0.00         0.00        0.00  0.00
##              Pelham Gardens            0.00         0.00        0.00  0.00
##                 Port Morris            0.00         0.00        0.00  0.00
##               Port Richmond            0.00         0.00        0.00  0.00
##                Prince's Bay            0.00         0.00        0.00  0.00
##            Prospect Heights            0.00         0.00        0.00  0.01
##   Prospect-Lefferts Gardens            0.01         0.00        0.00  0.01
##              Queens Village            0.00         0.00        0.00  0.00
##               Randall Manor            0.00         0.00        0.00  0.00
##                    Red Hook            0.00         0.00        0.00  0.00
##                   Rego Park            0.00         0.00        0.00  0.00
##               Richmond Hill            0.00         0.00        0.00  0.00
##                Richmondtown            0.00         0.00        0.00  0.00
##                   Ridgewood            0.00         0.01        0.00  0.01
##                   Riverdale            0.00         0.00        0.00  0.00
##              Rockaway Beach            0.00         0.00        0.00  0.00
##            Roosevelt Island            0.00         0.00        0.00  0.00
##                    Rosebank            0.00         0.00        0.00  0.00
##                    Rosedale            0.00         0.00        0.00  0.00
##                   Rossville            0.00         0.00        0.00  0.00
##               Schuylerville            0.00         0.00        0.00  0.00
##                    Sea Gate            0.00         0.00        0.00  0.00
##              Sheepshead Bay            0.00         0.00        0.00  0.00
##                 Shore Acres            0.00         0.00        0.00  0.00
##                 Silver Lake            0.00         0.00        0.00  0.00
##                        SoHo            0.01         0.00        0.00  0.01
##                   Soundview            0.00         0.00        0.00  0.00
##                 South Beach            0.00         0.00        0.00  0.00
##            South Ozone Park            0.00         0.00        0.00  0.00
##                 South Slope            0.00         0.00        0.00  0.01
##         Springfield Gardens            0.00         0.00        0.00  0.00
##              Spuyten Duyvil            0.00         0.00        0.00  0.00
##                  St. Albans            0.00         0.00        0.00  0.00
##                  St. George            0.00         0.00        0.00  0.00
##                   Stapleton            0.00         0.00        0.00  0.00
##             Stuyvesant Town            0.00         0.00        0.00  0.00
##                   Sunnyside            0.00         0.00        0.00  0.01
##                 Sunset Park            0.00         0.00        0.00  0.01
##            Theater District            0.00         0.00        0.00  0.01
##                 Throgs Neck            0.00         0.00        0.00  0.00
##                   Todt Hill            0.00         0.00        0.00  0.00
##               Tompkinsville            0.00         0.00        0.00  0.00
##                 Tottenville            0.00         0.00        0.00  0.00
##                     Tremont            0.00         0.00        0.00  0.00
##                     Tribeca            0.00         0.00        0.00  0.00
##                 Two Bridges            0.00         0.00        0.00  0.00
##                   Unionport            0.00         0.00        0.00  0.00
##          University Heights            0.00         0.00        0.00  0.00
##             Upper East Side            0.03         0.01        0.00  0.03
##             Upper West Side            0.02         0.01        0.00  0.03
##                    Van Nest            0.00         0.00        0.00  0.00
##                Vinegar Hill            0.00         0.00        0.00  0.00
##                   Wakefield            0.00         0.00        0.00  0.00
##          Washington Heights            0.01         0.01        0.00  0.02
##               West Brighton            0.00         0.00        0.00  0.00
##                  West Farms            0.00         0.00        0.00  0.00
##                West Village            0.01         0.00        0.00  0.01
##          Westchester Square            0.00         0.00        0.00  0.00
##                 Westerleigh            0.00         0.00        0.00  0.00
##                  Whitestone            0.00         0.00        0.00  0.00
##              Williamsbridge            0.00         0.00        0.00  0.00
##                Williamsburg            0.04         0.03        0.00  0.07
##                 Willowbrook            0.00         0.00        0.00  0.00
##             Windsor Terrace            0.00         0.00        0.00  0.00
##                   Woodhaven            0.00         0.00        0.00  0.00
##                    Woodlawn            0.00         0.00        0.00  0.00
##                     Woodrow            0.00         0.00        0.00  0.00
##                    Woodside            0.00         0.00        0.00  0.01
##                       Total            0.53         0.45        0.03  1.00

Comments

  • some neighbourhood has less than 30 observations
  • Because most neighborhood has less than 1%, it is probably better to use neighbourhood_group instead, or just use most popular places for analysis

neighbourhood & neighbourhood_group

fp %>% 
  tabyl(neighbourhood, neighbourhood_group) %>% # creates table of counts
  adorn_totals(where = c("row", "col")) # Total margins
##               neighbourhood Bronx Brooklyn Manhattan Queens Staten Island
##                    Allerton    37        0         0      0             0
##               Arden Heights     0        0         0      0             3
##                    Arrochar     0        0         0      0            21
##                     Arverne     0        0         0     73             0
##                     Astoria     0        0         0    548             0
##                  Bath Beach     0       15         0      0             0
##           Battery Park City     0        0        34      0             0
##                   Bay Ridge     0      106         0      0             0
##                 Bay Terrace     0        0         0      6             0
##  Bay Terrace, Staten Island     0        0         0      0             0
##                  Baychester     6        0         0      0             0
##                     Bayside     0        0         0     36             0
##                   Bayswater     0        0         0     17             0
##          Bedford-Stuyvesant     0     2478         0      0             0
##                Belle Harbor     0        0         0      8             0
##                   Bellerose     0        0         0     12             0
##                     Belmont    17        0         0      0             0
##                 Bensonhurst     0       55         0      0             0
##                Bergen Beach     0       10         0      0             0
##                 Boerum Hill     0       99         0      0             0
##                Borough Park     0      112         0      0             0
##                Breezy Point     0        0         0      3             0
##                   Briarwood     0        0         0     48             0
##              Brighton Beach     0       64         0      0             0
##                   Bronxdale    13        0         0      0             0
##            Brooklyn Heights     0       83         0      0             0
##                 Brownsville     0       48         0      0             0
##                 Bull's Head     0        0         0      0             4
##                    Bushwick     0     1447         0      0             0
##             Cambria Heights     0        0         0     20             0
##                    Canarsie     0      140         0      0             0
##             Carroll Gardens     0      122         0      0             0
##                 Castle Hill     9        0         0      0             0
##           Castleton Corners     0        0         0      0             3
##                     Chelsea     0        0       706      0             0
##                   Chinatown     0        0       226      0             0
##                 City Island    17        0         0      0             0
##                Civic Center     0        0        27      0             0
##           Claremont Village    28        0         0      0             0
##                Clason Point    20        0         0      0             0
##                     Clifton     0        0         0      0            14
##                Clinton Hill     0      322         0      0             0
##                  Co-op City     2        0         0      0             0
##                 Cobble Hill     0       58         0      0             0
##               College Point     0        0         0     15             0
##                 Columbia St     0       26         0      0             0
##                     Concord     0        0         0      0            26
##                   Concourse    39        0         0      0             0
##           Concourse Village    24        0         0      0             0
##                Coney Island     0       14         0      0             0
##                      Corona     0        0         0     62             0
##               Crown Heights     0      912         0      0             0
##               Cypress Hills     0      117         0      0             0
##            Ditmars Steinway     0        0         0    210             0
##                Dongan Hills     0        0         0      0             6
##                  Douglaston     0        0         0      8             0
##           Downtown Brooklyn     0       38         0      0             0
##                       DUMBO     0       20         0      0             0
##               Dyker Heights     0       10         0      0             0
##               East Elmhurst     0        0         0    163             0
##               East Flatbush     0      433         0      0             0
##                 East Harlem     0        0       736      0             0
##             East Morrisania     9        0         0      0             0
##               East New York     0      211         0      0             0
##                East Village     0        0       957      0             0
##                 Eastchester    13        0         0      0             0
##                    Edenwald    13        0         0      0             0
##                    Edgemere     0        0         0     10             0
##                    Elmhurst     0        0         0    179             0
##                 Eltingville     0        0         0      0             3
##                Emerson Hill     0        0         0      0             4
##                Far Rockaway     0        0         0     25             0
##                   Fieldston     7        0         0      0             0
##          Financial District     0        0       572      0             0
##                    Flatbush     0      367         0      0             0
##           Flatiron District     0        0        49      0             0
##                   Flatlands     0       74         0      0             0
##                    Flushing     0        0         0    369             0
##                     Fordham    56        0         0      0             0
##                Forest Hills     0        0         0     99             0
##                 Fort Greene     0      287         0      0             0
##               Fort Hamilton     0       42         0      0             0
##              Fort Wadsworth     0        0         0      0             1
##               Fresh Meadows     0        0         0     27             0
##                    Glendale     0        0         0     42             0
##                     Gowanus     0      152         0      0             0
##                    Gramercy     0        0       196      0             0
##                Graniteville     0        0         0      0             2
##                  Grant City     0        0         0      0             6
##                   Gravesend     0       46         0      0             0
##                 Great Kills     0        0         0      0            10
##                  Greenpoint     0      585         0      0             0
##           Greenwich Village     0        0       207      0             0
##                 Grymes Hill     0        0         0      0             6
##                      Harlem     0        0      1734      0             0
##              Hell's Kitchen     0        0      1446      0             0
##                  Highbridge    24        0         0      0             0
##                      Hollis     0        0         0     13             0
##                  Holliswood     0        0         0      3             0
##                Howard Beach     0        0         0     16             0
##                Howland Hook     0        0         0      0             2
##                    Huguenot     0        0         0      0             3
##                 Hunts Point    16        0         0      0             0
##                      Inwood     0        0       156      0             0
##             Jackson Heights     0        0         0    150             0
##                     Jamaica     0        0         0    211             0
##             Jamaica Estates     0        0         0     18             0
##               Jamaica Hills     0        0         0      6             0
##                  Kensington     0      102         0      0             0
##                 Kew Gardens     0        0         0     26             0
##           Kew Gardens Hills     0        0         0     19             0
##                 Kingsbridge    58        0         0      0             0
##                    Kips Bay     0        0       278      0             0
##                   Laurelton     0        0         0     16             0
##             Lighthouse Hill     0        0         0      0             2
##                Little Italy     0        0        80      0             0
##                 Little Neck     0        0         0      5             0
##            Long Island City     0        0         0    357             0
##                    Longwood    44        0         0      0             0
##             Lower East Side     0        0       530      0             0
##             Manhattan Beach     0        6         0      0             0
##                 Marble Hill     0        0         6      0             0
##             Mariners Harbor     0        0         0      0             8
##                     Maspeth     0        0         0     98             0
##                     Melrose     7        0         0      0             0
##              Middle Village     0        0         0     29             0
##               Midland Beach     0        0         0      0             5
##                     Midtown     0        0      1087      0             0
##                     Midwood     0       69         0      0             0
##                  Mill Basin     0        4         0      0             0
##         Morningside Heights     0        0       128      0             0
##              Morris Heights    14        0         0      0             0
##                 Morris Park    13        0         0      0             0
##                  Morrisania    14        0         0      0             0
##                  Mott Haven    52        0         0      0             0
##                  Mount Eden     3        0         0      0             0
##                  Mount Hope    16        0         0      0             0
##                 Murray Hill     0        0       366      0             0
##                   Navy Yard     0        6         0      0             0
##                    Neponsit     0        0         0      3             0
##                New Brighton     0        0         0      0             5
##                    New Dorp     0        0         0      0             0
##              New Dorp Beach     0        0         0      0             4
##             New Springville     0        0         0      0             6
##                        NoHo     0        0        53      0             0
##                      Nolita     0        0       131      0             0
##             North Riverdale     8        0         0      0             0
##                     Norwood    22        0         0      0             0
##                     Oakwood     0        0         0      0             4
##                   Olinville     3        0         0      0             0
##                  Ozone Park     0        0         0     51             0
##                  Park Slope     0      289         0      0             0
##                 Parkchester    31        0         0      0             0
##                  Pelham Bay    16        0         0      0             0
##              Pelham Gardens    26        0         0      0             0
##                 Port Morris    41        0         0      0             0
##               Port Richmond     0        0         0      0             5
##                Prince's Bay     0        0         0      0             3
##            Prospect Heights     0      189         0      0             0
##   Prospect-Lefferts Gardens     0      325         0      0             0
##              Queens Village     0        0         0     51             0
##               Randall Manor     0        0         0      0            19
##                    Red Hook     0       53         0      0             0
##                   Rego Park     0        0         0     85             0
##               Richmond Hill     0        0         0     83             0
##                Richmondtown     0        0         0      0             1
##                   Ridgewood     0        0         0    277             0
##                   Riverdale     8        0         0      0             0
##              Rockaway Beach     0        0         0     49             0
##            Roosevelt Island     0        0        35      0             0
##                    Rosebank     0        0         0      0             6
##                    Rosedale     0        0         0     54             0
##                   Rossville     0        0         0      0             1
##               Schuylerville    12        0         0      0             0
##                    Sea Gate     0        3         0      0             0
##              Sheepshead Bay     0      132         0      0             0
##                 Shore Acres     0        0         0      0             5
##                 Silver Lake     0        0         0      0             1
##                        SoHo     0        0       244      0             0
##                   Soundview    12        0         0      0             0
##                 South Beach     0        0         0      0             7
##            South Ozone Park     0        0         0     38             0
##                 South Slope     0      168         0      0             0
##         Springfield Gardens     0        0         0     79             0
##              Spuyten Duyvil     4        0         0      0             0
##                  St. Albans     0        0         0     72             0
##                  St. George     0        0         0      0            42
##                   Stapleton     0        0         0      0            25
##             Stuyvesant Town     0        0        18      0             0
##                   Sunnyside     0        0         0    245             0
##                 Sunset Park     0      249         0      0             0
##            Theater District     0        0       239      0             0
##                 Throgs Neck    23        0         0      0             0
##                   Todt Hill     0        0         0      0             3
##               Tompkinsville     0        0         0      0            37
##                 Tottenville     0        0         0      0             7
##                     Tremont     7        0         0      0             0
##                     Tribeca     0        0       121      0             0
##                 Two Bridges     0        0        47      0             0
##                   Unionport     7        0         0      0             0
##          University Heights    15        0         0      0             0
##             Upper East Side     0        0      1095      0             0
##             Upper West Side     0        0      1089      0             0
##                    Van Nest    11        0         0      0             0
##                Vinegar Hill     0       21         0      0             0
##                   Wakefield    42        0         0      0             0
##          Washington Heights     0        0       548      0             0
##               West Brighton     0        0         0      0            18
##                  West Farms     2        0         0      0             0
##                West Village     0        0       418      0             0
##          Westchester Square     9        0         0      0             0
##                 Westerleigh     0        0         0      0             2
##                  Whitestone     0        0         0     10             0
##              Williamsbridge    34        0         0      0             0
##                Williamsburg     0     2051         0      0             0
##                 Willowbrook     0        0         0      0             1
##             Windsor Terrace     0       93         0      0             0
##                   Woodhaven     0        0         0     79             0
##                    Woodlawn     9        0         0      0             0
##                     Woodrow     0        0         0      0             0
##                    Woodside     0        0         0    175             0
##                       Total   913    12253     13559   4298           331
##  Total
##     37
##      3
##     21
##     73
##    548
##     15
##     34
##    106
##      6
##      0
##      6
##     36
##     17
##   2478
##      8
##     12
##     17
##     55
##     10
##     99
##    112
##      3
##     48
##     64
##     13
##     83
##     48
##      4
##   1447
##     20
##    140
##    122
##      9
##      3
##    706
##    226
##     17
##     27
##     28
##     20
##     14
##    322
##      2
##     58
##     15
##     26
##     26
##     39
##     24
##     14
##     62
##    912
##    117
##    210
##      6
##      8
##     38
##     20
##     10
##    163
##    433
##    736
##      9
##    211
##    957
##     13
##     13
##     10
##    179
##      3
##      4
##     25
##      7
##    572
##    367
##     49
##     74
##    369
##     56
##     99
##    287
##     42
##      1
##     27
##     42
##    152
##    196
##      2
##      6
##     46
##     10
##    585
##    207
##      6
##   1734
##   1446
##     24
##     13
##      3
##     16
##      2
##      3
##     16
##    156
##    150
##    211
##     18
##      6
##    102
##     26
##     19
##     58
##    278
##     16
##      2
##     80
##      5
##    357
##     44
##    530
##      6
##      6
##      8
##     98
##      7
##     29
##      5
##   1087
##     69
##      4
##    128
##     14
##     13
##     14
##     52
##      3
##     16
##    366
##      6
##      3
##      5
##      0
##      4
##      6
##     53
##    131
##      8
##     22
##      4
##      3
##     51
##    289
##     31
##     16
##     26
##     41
##      5
##      3
##    189
##    325
##     51
##     19
##     53
##     85
##     83
##      1
##    277
##      8
##     49
##     35
##      6
##     54
##      1
##     12
##      3
##    132
##      5
##      1
##    244
##     12
##      7
##     38
##    168
##     79
##      4
##     72
##     42
##     25
##     18
##    245
##    249
##    239
##     23
##      3
##     37
##      7
##      7
##    121
##     47
##      7
##     15
##   1095
##   1089
##     11
##     21
##     42
##    548
##     18
##      2
##    418
##      9
##      2
##     10
##     34
##   2051
##      1
##     93
##     79
##      9
##      0
##    175
##  31354
fp %>% 
  tabyl(neighbourhood, neighbourhood_group) %>% 
  adorn_totals(where = c("row", "col")) %>% 
  adorn_percentages(denominator = "all") %>% # creates proportions
  adorn_rounding(2) # round decimals
##               neighbourhood Bronx Brooklyn Manhattan Queens Staten Island
##                    Allerton  0.00     0.00      0.00   0.00          0.00
##               Arden Heights  0.00     0.00      0.00   0.00          0.00
##                    Arrochar  0.00     0.00      0.00   0.00          0.00
##                     Arverne  0.00     0.00      0.00   0.00          0.00
##                     Astoria  0.00     0.00      0.00   0.02          0.00
##                  Bath Beach  0.00     0.00      0.00   0.00          0.00
##           Battery Park City  0.00     0.00      0.00   0.00          0.00
##                   Bay Ridge  0.00     0.00      0.00   0.00          0.00
##                 Bay Terrace  0.00     0.00      0.00   0.00          0.00
##  Bay Terrace, Staten Island  0.00     0.00      0.00   0.00          0.00
##                  Baychester  0.00     0.00      0.00   0.00          0.00
##                     Bayside  0.00     0.00      0.00   0.00          0.00
##                   Bayswater  0.00     0.00      0.00   0.00          0.00
##          Bedford-Stuyvesant  0.00     0.08      0.00   0.00          0.00
##                Belle Harbor  0.00     0.00      0.00   0.00          0.00
##                   Bellerose  0.00     0.00      0.00   0.00          0.00
##                     Belmont  0.00     0.00      0.00   0.00          0.00
##                 Bensonhurst  0.00     0.00      0.00   0.00          0.00
##                Bergen Beach  0.00     0.00      0.00   0.00          0.00
##                 Boerum Hill  0.00     0.00      0.00   0.00          0.00
##                Borough Park  0.00     0.00      0.00   0.00          0.00
##                Breezy Point  0.00     0.00      0.00   0.00          0.00
##                   Briarwood  0.00     0.00      0.00   0.00          0.00
##              Brighton Beach  0.00     0.00      0.00   0.00          0.00
##                   Bronxdale  0.00     0.00      0.00   0.00          0.00
##            Brooklyn Heights  0.00     0.00      0.00   0.00          0.00
##                 Brownsville  0.00     0.00      0.00   0.00          0.00
##                 Bull's Head  0.00     0.00      0.00   0.00          0.00
##                    Bushwick  0.00     0.05      0.00   0.00          0.00
##             Cambria Heights  0.00     0.00      0.00   0.00          0.00
##                    Canarsie  0.00     0.00      0.00   0.00          0.00
##             Carroll Gardens  0.00     0.00      0.00   0.00          0.00
##                 Castle Hill  0.00     0.00      0.00   0.00          0.00
##           Castleton Corners  0.00     0.00      0.00   0.00          0.00
##                     Chelsea  0.00     0.00      0.02   0.00          0.00
##                   Chinatown  0.00     0.00      0.01   0.00          0.00
##                 City Island  0.00     0.00      0.00   0.00          0.00
##                Civic Center  0.00     0.00      0.00   0.00          0.00
##           Claremont Village  0.00     0.00      0.00   0.00          0.00
##                Clason Point  0.00     0.00      0.00   0.00          0.00
##                     Clifton  0.00     0.00      0.00   0.00          0.00
##                Clinton Hill  0.00     0.01      0.00   0.00          0.00
##                  Co-op City  0.00     0.00      0.00   0.00          0.00
##                 Cobble Hill  0.00     0.00      0.00   0.00          0.00
##               College Point  0.00     0.00      0.00   0.00          0.00
##                 Columbia St  0.00     0.00      0.00   0.00          0.00
##                     Concord  0.00     0.00      0.00   0.00          0.00
##                   Concourse  0.00     0.00      0.00   0.00          0.00
##           Concourse Village  0.00     0.00      0.00   0.00          0.00
##                Coney Island  0.00     0.00      0.00   0.00          0.00
##                      Corona  0.00     0.00      0.00   0.00          0.00
##               Crown Heights  0.00     0.03      0.00   0.00          0.00
##               Cypress Hills  0.00     0.00      0.00   0.00          0.00
##            Ditmars Steinway  0.00     0.00      0.00   0.01          0.00
##                Dongan Hills  0.00     0.00      0.00   0.00          0.00
##                  Douglaston  0.00     0.00      0.00   0.00          0.00
##           Downtown Brooklyn  0.00     0.00      0.00   0.00          0.00
##                       DUMBO  0.00     0.00      0.00   0.00          0.00
##               Dyker Heights  0.00     0.00      0.00   0.00          0.00
##               East Elmhurst  0.00     0.00      0.00   0.01          0.00
##               East Flatbush  0.00     0.01      0.00   0.00          0.00
##                 East Harlem  0.00     0.00      0.02   0.00          0.00
##             East Morrisania  0.00     0.00      0.00   0.00          0.00
##               East New York  0.00     0.01      0.00   0.00          0.00
##                East Village  0.00     0.00      0.03   0.00          0.00
##                 Eastchester  0.00     0.00      0.00   0.00          0.00
##                    Edenwald  0.00     0.00      0.00   0.00          0.00
##                    Edgemere  0.00     0.00      0.00   0.00          0.00
##                    Elmhurst  0.00     0.00      0.00   0.01          0.00
##                 Eltingville  0.00     0.00      0.00   0.00          0.00
##                Emerson Hill  0.00     0.00      0.00   0.00          0.00
##                Far Rockaway  0.00     0.00      0.00   0.00          0.00
##                   Fieldston  0.00     0.00      0.00   0.00          0.00
##          Financial District  0.00     0.00      0.02   0.00          0.00
##                    Flatbush  0.00     0.01      0.00   0.00          0.00
##           Flatiron District  0.00     0.00      0.00   0.00          0.00
##                   Flatlands  0.00     0.00      0.00   0.00          0.00
##                    Flushing  0.00     0.00      0.00   0.01          0.00
##                     Fordham  0.00     0.00      0.00   0.00          0.00
##                Forest Hills  0.00     0.00      0.00   0.00          0.00
##                 Fort Greene  0.00     0.01      0.00   0.00          0.00
##               Fort Hamilton  0.00     0.00      0.00   0.00          0.00
##              Fort Wadsworth  0.00     0.00      0.00   0.00          0.00
##               Fresh Meadows  0.00     0.00      0.00   0.00          0.00
##                    Glendale  0.00     0.00      0.00   0.00          0.00
##                     Gowanus  0.00     0.00      0.00   0.00          0.00
##                    Gramercy  0.00     0.00      0.01   0.00          0.00
##                Graniteville  0.00     0.00      0.00   0.00          0.00
##                  Grant City  0.00     0.00      0.00   0.00          0.00
##                   Gravesend  0.00     0.00      0.00   0.00          0.00
##                 Great Kills  0.00     0.00      0.00   0.00          0.00
##                  Greenpoint  0.00     0.02      0.00   0.00          0.00
##           Greenwich Village  0.00     0.00      0.01   0.00          0.00
##                 Grymes Hill  0.00     0.00      0.00   0.00          0.00
##                      Harlem  0.00     0.00      0.06   0.00          0.00
##              Hell's Kitchen  0.00     0.00      0.05   0.00          0.00
##                  Highbridge  0.00     0.00      0.00   0.00          0.00
##                      Hollis  0.00     0.00      0.00   0.00          0.00
##                  Holliswood  0.00     0.00      0.00   0.00          0.00
##                Howard Beach  0.00     0.00      0.00   0.00          0.00
##                Howland Hook  0.00     0.00      0.00   0.00          0.00
##                    Huguenot  0.00     0.00      0.00   0.00          0.00
##                 Hunts Point  0.00     0.00      0.00   0.00          0.00
##                      Inwood  0.00     0.00      0.00   0.00          0.00
##             Jackson Heights  0.00     0.00      0.00   0.00          0.00
##                     Jamaica  0.00     0.00      0.00   0.01          0.00
##             Jamaica Estates  0.00     0.00      0.00   0.00          0.00
##               Jamaica Hills  0.00     0.00      0.00   0.00          0.00
##                  Kensington  0.00     0.00      0.00   0.00          0.00
##                 Kew Gardens  0.00     0.00      0.00   0.00          0.00
##           Kew Gardens Hills  0.00     0.00      0.00   0.00          0.00
##                 Kingsbridge  0.00     0.00      0.00   0.00          0.00
##                    Kips Bay  0.00     0.00      0.01   0.00          0.00
##                   Laurelton  0.00     0.00      0.00   0.00          0.00
##             Lighthouse Hill  0.00     0.00      0.00   0.00          0.00
##                Little Italy  0.00     0.00      0.00   0.00          0.00
##                 Little Neck  0.00     0.00      0.00   0.00          0.00
##            Long Island City  0.00     0.00      0.00   0.01          0.00
##                    Longwood  0.00     0.00      0.00   0.00          0.00
##             Lower East Side  0.00     0.00      0.02   0.00          0.00
##             Manhattan Beach  0.00     0.00      0.00   0.00          0.00
##                 Marble Hill  0.00     0.00      0.00   0.00          0.00
##             Mariners Harbor  0.00     0.00      0.00   0.00          0.00
##                     Maspeth  0.00     0.00      0.00   0.00          0.00
##                     Melrose  0.00     0.00      0.00   0.00          0.00
##              Middle Village  0.00     0.00      0.00   0.00          0.00
##               Midland Beach  0.00     0.00      0.00   0.00          0.00
##                     Midtown  0.00     0.00      0.03   0.00          0.00
##                     Midwood  0.00     0.00      0.00   0.00          0.00
##                  Mill Basin  0.00     0.00      0.00   0.00          0.00
##         Morningside Heights  0.00     0.00      0.00   0.00          0.00
##              Morris Heights  0.00     0.00      0.00   0.00          0.00
##                 Morris Park  0.00     0.00      0.00   0.00          0.00
##                  Morrisania  0.00     0.00      0.00   0.00          0.00
##                  Mott Haven  0.00     0.00      0.00   0.00          0.00
##                  Mount Eden  0.00     0.00      0.00   0.00          0.00
##                  Mount Hope  0.00     0.00      0.00   0.00          0.00
##                 Murray Hill  0.00     0.00      0.01   0.00          0.00
##                   Navy Yard  0.00     0.00      0.00   0.00          0.00
##                    Neponsit  0.00     0.00      0.00   0.00          0.00
##                New Brighton  0.00     0.00      0.00   0.00          0.00
##                    New Dorp  0.00     0.00      0.00   0.00          0.00
##              New Dorp Beach  0.00     0.00      0.00   0.00          0.00
##             New Springville  0.00     0.00      0.00   0.00          0.00
##                        NoHo  0.00     0.00      0.00   0.00          0.00
##                      Nolita  0.00     0.00      0.00   0.00          0.00
##             North Riverdale  0.00     0.00      0.00   0.00          0.00
##                     Norwood  0.00     0.00      0.00   0.00          0.00
##                     Oakwood  0.00     0.00      0.00   0.00          0.00
##                   Olinville  0.00     0.00      0.00   0.00          0.00
##                  Ozone Park  0.00     0.00      0.00   0.00          0.00
##                  Park Slope  0.00     0.01      0.00   0.00          0.00
##                 Parkchester  0.00     0.00      0.00   0.00          0.00
##                  Pelham Bay  0.00     0.00      0.00   0.00          0.00
##              Pelham Gardens  0.00     0.00      0.00   0.00          0.00
##                 Port Morris  0.00     0.00      0.00   0.00          0.00
##               Port Richmond  0.00     0.00      0.00   0.00          0.00
##                Prince's Bay  0.00     0.00      0.00   0.00          0.00
##            Prospect Heights  0.00     0.01      0.00   0.00          0.00
##   Prospect-Lefferts Gardens  0.00     0.01      0.00   0.00          0.00
##              Queens Village  0.00     0.00      0.00   0.00          0.00
##               Randall Manor  0.00     0.00      0.00   0.00          0.00
##                    Red Hook  0.00     0.00      0.00   0.00          0.00
##                   Rego Park  0.00     0.00      0.00   0.00          0.00
##               Richmond Hill  0.00     0.00      0.00   0.00          0.00
##                Richmondtown  0.00     0.00      0.00   0.00          0.00
##                   Ridgewood  0.00     0.00      0.00   0.01          0.00
##                   Riverdale  0.00     0.00      0.00   0.00          0.00
##              Rockaway Beach  0.00     0.00      0.00   0.00          0.00
##            Roosevelt Island  0.00     0.00      0.00   0.00          0.00
##                    Rosebank  0.00     0.00      0.00   0.00          0.00
##                    Rosedale  0.00     0.00      0.00   0.00          0.00
##                   Rossville  0.00     0.00      0.00   0.00          0.00
##               Schuylerville  0.00     0.00      0.00   0.00          0.00
##                    Sea Gate  0.00     0.00      0.00   0.00          0.00
##              Sheepshead Bay  0.00     0.00      0.00   0.00          0.00
##                 Shore Acres  0.00     0.00      0.00   0.00          0.00
##                 Silver Lake  0.00     0.00      0.00   0.00          0.00
##                        SoHo  0.00     0.00      0.01   0.00          0.00
##                   Soundview  0.00     0.00      0.00   0.00          0.00
##                 South Beach  0.00     0.00      0.00   0.00          0.00
##            South Ozone Park  0.00     0.00      0.00   0.00          0.00
##                 South Slope  0.00     0.01      0.00   0.00          0.00
##         Springfield Gardens  0.00     0.00      0.00   0.00          0.00
##              Spuyten Duyvil  0.00     0.00      0.00   0.00          0.00
##                  St. Albans  0.00     0.00      0.00   0.00          0.00
##                  St. George  0.00     0.00      0.00   0.00          0.00
##                   Stapleton  0.00     0.00      0.00   0.00          0.00
##             Stuyvesant Town  0.00     0.00      0.00   0.00          0.00
##                   Sunnyside  0.00     0.00      0.00   0.01          0.00
##                 Sunset Park  0.00     0.01      0.00   0.00          0.00
##            Theater District  0.00     0.00      0.01   0.00          0.00
##                 Throgs Neck  0.00     0.00      0.00   0.00          0.00
##                   Todt Hill  0.00     0.00      0.00   0.00          0.00
##               Tompkinsville  0.00     0.00      0.00   0.00          0.00
##                 Tottenville  0.00     0.00      0.00   0.00          0.00
##                     Tremont  0.00     0.00      0.00   0.00          0.00
##                     Tribeca  0.00     0.00      0.00   0.00          0.00
##                 Two Bridges  0.00     0.00      0.00   0.00          0.00
##                   Unionport  0.00     0.00      0.00   0.00          0.00
##          University Heights  0.00     0.00      0.00   0.00          0.00
##             Upper East Side  0.00     0.00      0.03   0.00          0.00
##             Upper West Side  0.00     0.00      0.03   0.00          0.00
##                    Van Nest  0.00     0.00      0.00   0.00          0.00
##                Vinegar Hill  0.00     0.00      0.00   0.00          0.00
##                   Wakefield  0.00     0.00      0.00   0.00          0.00
##          Washington Heights  0.00     0.00      0.02   0.00          0.00
##               West Brighton  0.00     0.00      0.00   0.00          0.00
##                  West Farms  0.00     0.00      0.00   0.00          0.00
##                West Village  0.00     0.00      0.01   0.00          0.00
##          Westchester Square  0.00     0.00      0.00   0.00          0.00
##                 Westerleigh  0.00     0.00      0.00   0.00          0.00
##                  Whitestone  0.00     0.00      0.00   0.00          0.00
##              Williamsbridge  0.00     0.00      0.00   0.00          0.00
##                Williamsburg  0.00     0.07      0.00   0.00          0.00
##                 Willowbrook  0.00     0.00      0.00   0.00          0.00
##             Windsor Terrace  0.00     0.00      0.00   0.00          0.00
##                   Woodhaven  0.00     0.00      0.00   0.00          0.00
##                    Woodlawn  0.00     0.00      0.00   0.00          0.00
##                     Woodrow  0.00     0.00      0.00   0.00          0.00
##                    Woodside  0.00     0.00      0.00   0.01          0.00
##                       Total  0.03     0.39      0.43   0.14          0.01
##  Total
##   0.00
##   0.00
##   0.00
##   0.00
##   0.02
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.08
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.05
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.02
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.03
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.01
##   0.02
##   0.00
##   0.01
##   0.03
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.02
##   0.01
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.02
##   0.01
##   0.00
##   0.06
##   0.05
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.02
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.03
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   0.01
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.03
##   0.03
##   0.00
##   0.00
##   0.00
##   0.02
##   0.00
##   0.00
##   0.01
##   0.00
##   0.00
##   0.00
##   0.00
##   0.07
##   0.00
##   0.00
##   0.00
##   0.00
##   0.00
##   0.01
##   1.00

Comments

  • There are too many neighbourhood, so the proportion for each neighbourhood is less than 1%

Quantitative

# Correlation table
fp %>% 
  select_if(is.numeric) %>% # Use to select just the numeric variables
  cor() %>% 
  round(2) %>% 
  kable()
latitude longitude price minimum_nights number_of_reviews reviews_per_month calculated_host_listings_count availability_365 list_history min_spend
latitude 1.00 0.08 0.03 0.04 -0.01 -0.02 0.03 0.00 NA 0.01
longitude 0.08 1.00 -0.16 -0.08 0.03 0.12 -0.15 0.03 NA -0.05
price 0.03 -0.16 1.00 0.04 -0.07 -0.08 0.06 0.07 NA 0.46
minimum_nights 0.04 -0.08 0.04 1.00 -0.12 -0.18 0.12 0.13 NA 0.39
number_of_reviews -0.01 0.03 -0.07 -0.12 1.00 0.54 -0.12 0.01 NA -0.05
reviews_per_month -0.02 0.12 -0.08 -0.18 0.54 1.00 -0.11 -0.09 NA -0.07
calculated_host_listings_count 0.03 -0.15 0.06 0.12 -0.12 -0.11 1.00 0.19 NA 0.06
availability_365 0.00 0.03 0.07 0.13 0.01 -0.09 0.19 1.00 NA 0.06
list_history NA NA NA NA NA NA NA NA 1 NA
min_spend 0.01 -0.05 0.46 0.39 -0.05 -0.07 0.06 0.06 NA 1.00
fp %>% 
  inspect_cor() %>% 
  show_plot()

Comments

  • calculated_host_listings_count and availability_365 has some correlations
  • number_of_reviews and availability_365 has some correlations

Questions

Multi-variate graphical

Categorical

grid.arrange(
  # neighbourhood_group and room_type
  fp %>% 
    ggplot(mapping = aes(x = neighbourhood_group, fill = room_type)) + 
      geom_bar(position = "dodge"),
ncol = 1
)

grid.arrange(
  fp %>% 
    ggplot(mapping = aes(x = neighbourhood_group, fill = room_type)) + 
      geom_bar(position = "dodge") +
      coord_flip(),
  fp %>% 
    ggplot(mapping = aes(x = neighbourhood_group, fill = room_type)) +
      geom_bar(position = "fill") +
      coord_flip(),
ncol = 1
)

Comments

fp %>%
  group_by(neighbourhood_group, room_type) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(neighbourhood_group, room_type)) + 
  geom_tile(aes(fill = count))

Comments

Questions

Quantitative

# price and minimum_nights
fp %>% 
  ggplot(mapping = aes(x = price, y = minimum_nights)) +
  geom_point()

Comments

  • It seems they do not have any relationship
# price and number_of_reviews
fp %>% 
  ggplot(mapping = aes(x = price, y = number_of_reviews)) +
  geom_point()

Comments

  • It seems they do not have any relationship
# price and reviews_per_month
fp %>% 
  ggplot(mapping = aes(x = price, y = reviews_per_month)) +
  geom_point()

Comments

  • This may have more relationship than number of total review and price
# price and availability_365
fp %>% 
  ggplot(mapping = aes(x = price, y = availability_365)) +
  geom_point()

Comments

  • This may have relationship
# calculated_host_listings_count and availability_365
fp %>% 
  ggplot(mapping = aes(x = calculated_host_listings_count, y = availability_365)) +
  geom_point()

Comments

  • This may have relationship
# calculated_host_listings_count and minimum_nights
fp %>% 
  ggplot(mapping = aes(x = calculated_host_listings_count, y = minimum_nights)) +
  geom_point()

Comments

  • This may have relationship
# longitude and latitude
fp %>% 
  ggplot(mapping = aes(x = longitude, y = latitude)) +
  geom_point()

Comments

  • Definitely the map of new york city
  • Some places does not have any listing

Categorical and quantitative

fp %>%
  select(neighbourhood_group, room_type, price, minimum_nights, number_of_reviews, reviews_per_month, calculated_host_listings_count, availability_365) %>%
  ggpairs()

# Correlation of variables 
fp %>% 
  plot_correlation(maxcat = 5L)

Comments

  • Looking more closely
    • neighbourhood_group / top neighbourhood of the group with other variables
# minimum_nights, price and neighbourhood_group
fp %>% 
    ggplot(mapping = aes(x = minimum_nights, y = price, color = neighbourhood_group)) +
      geom_point() +
      geom_smooth(method = "lm", se = FALSE)

# availability_365, price and neighbourhood_group
fp %>% 
    ggplot(mapping = aes(x = availability_365, y = price, color = neighbourhood_group)) +
      geom_point() +
      geom_smooth(method = "lm", se = FALSE)

# reviews_per_month, price and neighbourhood_group
fp %>% 
    ggplot(mapping = aes(x = reviews_per_month, y = number_of_reviews, color = neighbourhood_group)) +
      geom_point() +
      geom_smooth(method = "lm", se = FALSE)

# availability_365, calculated_host_listings_count and neighbourhood_group
fp %>% 
    ggplot(mapping = aes(x = availability_365, y = calculated_host_listings_count, color = neighbourhood_group)) +
      geom_point() +
      geom_smooth(method = "lm", se = FALSE)

Comments

  • Almost all neighbourhood_group have the similar correlation pattern
  • Manhattan has more correlation with availability_365 and calculated_host_listings_count
grid.arrange(
  # neighbourhood_group and price
  fp %>% 
    ggplot(mapping = aes(x = neighbourhood_group, y = price)) +
      geom_boxplot(),
  # room_type and price
  fp %>% 
    ggplot(mapping = aes(x = room_type, y = price)) +
      geom_boxplot(),
  # neighbourhood_group and minimum_nights
  fp %>% 
    ggplot(mapping = aes(x = neighbourhood_group, y = minimum_nights)) +
      geom_boxplot(),
  # room_type and minimum_nights
  fp %>% 
    ggplot(mapping = aes(x = room_type, y = minimum_nights)) +
      geom_boxplot(),
  
  # neighbourhood_group and number_of_reviews
  fp %>% 
    ggplot(mapping = aes(x = neighbourhood_group, y = number_of_reviews)) +
      geom_boxplot(),
  # room_type and number_of_reviews
  fp %>% 
    ggplot(mapping = aes(x = room_type, y = number_of_reviews)) +
      geom_boxplot(),
  
  # neighbourhood_group and reviews_per_month
  fp %>% 
    ggplot(mapping = aes(x = neighbourhood_group, y = reviews_per_month)) +
      geom_boxplot(),
  # room_type and reviews_per_month
  fp %>% 
    ggplot(mapping = aes(x = room_type, y = reviews_per_month)) +
      geom_boxplot(),
  
ncol = 2
)

Comments

fp %>%
  group_by(neighbourhood_group, room_type) %>% 
  summarise(med_price = median(price)) %>% 
  ggplot(aes(neighbourhood_group, room_type)) +
  geom_tile(aes(fill = med_price))

fp %>%
  group_by(neighbourhood_group, room_type) %>% 
  summarise(med_nights = median(minimum_nights)) %>% 
  ggplot(aes(neighbourhood_group, room_type)) +
  geom_tile(aes(fill = med_nights))

fp %>%
  group_by(neighbourhood_group, room_type) %>% 
  summarise(med_reviews = median(number_of_reviews)) %>% 
  ggplot(aes(neighbourhood_group, room_type)) +
  geom_tile(aes(fill = med_reviews))

fp %>%
  group_by(neighbourhood_group, room_type) %>% 
  summarise(med_month_reviews = median(reviews_per_month)) %>% 
  ggplot(aes(neighbourhood_group, room_type)) +
  geom_tile(aes(fill = med_month_reviews))

fp %>%
  group_by(neighbourhood_group, room_type) %>% 
  summarise(med_lists = median(calculated_host_listings_count)) %>% 
  ggplot(aes(neighbourhood_group, room_type)) +
  geom_tile(aes(fill = med_lists))

Comments

  • Manhattan’s Entire home is the most expensive median, but number of reviews of Manhattan’s entire place has one of the lowest.

Questions

  • What makes some housing list extremely expensive?
  • More investigation on the map
  • More investigation on the top neighbourhood characteristics

Detailed EDA

What is the price distribution?

ggplot(fp, aes(price)) +
  geom_histogram(bins = 30, aes(y = ..density..)) + 
  geom_density(alpha = 0.5) +
  geom_vline(xintercept = mean(fp$price), size = 2, linetype = 3) +
  annotate("text", x = 1800, y = 0.75,label = paste("Mean price = ", paste0(round(mean(fp$price), 2))),
           color =  "#32CD32", size = 8) +
  scale_x_log10(breaks = seq(0, 10000, 100))

Comments

  • How about by the neighbourhood_groups?
neighbor_mean <- fp %>%
  group_by(neighbourhood_group) %>%
  summarise(price = round(mean(price), 2))

ggplot(fp, aes(price)) +
  geom_histogram(bins = 30, aes(y = ..density..)) + 
  geom_density(alpha = 0.2) +
  ggtitle("Transformed distribution of price\n by neighbourhood groups",
          subtitle = expression("With" ~'log'[10] ~ "transformation of x-axis")) +
  geom_vline(data = neighbor_mean, aes(xintercept = price), size = 2, linetype = 3) +
  geom_text(data = neighbor_mean,y = 1.5, aes(x = price + 1400, label = paste("Mean  = ",price)), color = "darkgreen", size = 4) +
  facet_wrap(~neighbourhood_group) +
  scale_x_log10() 

Comments

  • We can see all the neighbourhood_group have skewed mean even when applying log transformation
  • Manhattan has highest mean of 214.2
fp %>% filter(price >= mean(price)) %>% group_by(neighbourhood_group, room_type) %>% tally %>% 
  ggplot(aes(reorder(neighbourhood_group,desc(n)), n, fill = room_type)) +
  xlab(NULL) +
  ylab(NULL) +
  ggtitle("Number of above average price listings",
          subtitle = "Most of them are entire homes or apartments") +
           geom_bar(stat = "identity")

Comments

  • Visually I cannot see anything Shared room over the average
  • remember except Manhattan, all the neighbourhood_group have more private room
  • However, for the price above average, there are more entire home consisted
  • Entire home can create

More interested in room_type

fp %>%
  group_by(neighbourhood_group) %>%
    ggplot(aes(x = room_type, y = price)) +
      geom_boxplot(aes(fill = room_type)) + scale_y_log10() +
      xlab("Room type") + 
      ylab("Price") +
      ggtitle("Boxplots of price by room type",
              subtitle = "Entire homes and apartments have the highest avg price") +
      geom_hline(yintercept = mean(fp$price), color = "purple", linetype = 2)

How does it look like with median of room_type and neighbourhood

# Median of price by Room type
med_price <- fp %>% group_by(neighbourhood_group, room_type) %>% summarise(med_price = median(price ), n = n())
med_price %>% ggplot(aes(x = neighbourhood_group, y = med_price, fill = paste(room_type))) + 
  geom_bar(stat = 'identity', position = 'dodge')  

# Median of minimum_nights by Room type
med_nights <- fp %>% group_by(neighbourhood_group, room_type) %>% summarise(med_night = mean(minimum_nights),n = n())
med_nights %>% ggplot(aes(x = neighbourhood_group, y = med_night, fill = paste(room_type))) + 
  geom_bar(stat = 'identity', position = 'dodge')  

# Median of minimum cost (price*minimum_nights)s by Room type
med_min_spend <- fp %>% group_by(neighbourhood_group, room_type) %>% summarise(med_cost = median(price * minimum_nights),n = n())
med_min_spend %>% ggplot(aes(x = reorder(neighbourhood_group, -med_cost), y = med_cost, fill = paste(room_type))) + 
  geom_bar(stat = 'identity', position = 'dodge')+
  labs(x ="", y="", title = "Entire Room listings of Manhattan's minimum spending is outstanding", subtitle = "About double as Brooklyn's entire home", caption = "From Technical Appendix") +
  theme_classic() +
  theme(legend.title = element_blank(),
        legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 900,200), labels = scales::dollar) +
  scale_fill_discrete_qualitative(palette = "Cold") +
  ggsave("min_spend.png")
## Saving 7 x 5 in image

Comments

  • as assumed, the manhattan’s Entire room has the highest median
  • Manhattan’s private room is cheaper than other neighbourhood_group’s Entire home
  • Shared room in Staten Island is surprisingly more expensive than the private room median
  • It looks Private room to Shared room does not have much decrease in median revenue


Let’s see it on the map

fp %>% 
  ggplot(aes(longitude, latitude)) + 
  geom_hex() + 
  scale_fill_gradient(low = 'yellow', high = 'red', breaks = c(500, 1000)) + 
  labs(x = 'Longitude', y = 'Latitude') + 
  facet_wrap(~ room_type) + 
  theme(legend.position = 'bottom')

Comments

  • It is hard to even see the Shared room whereas Entire home and private room are located mostly in Manhattan area and Brooklyn

Median price by neighbourhood

neighbour_median <- fp %>%
  group_by(neighbourhood) %>%
  summarise(median(price))
colnames(neighbour_median)[2] <- "med_price_neighbour"
fp_subset <- merge(fp, neighbour_median)
staten$neighbourhood[staten$price == 800]
## [1] Fort Wadsworth
## 221 Levels: Allerton Arden Heights Arrochar Arverne Astoria ... Woodside
fp_subset <- subset(fp_subset, neighbourhood != "Fort Wadsworth")
save_p_1 <- ggmap(get_googlemap(center = c(lon = -73.95, lat = 40.72),
                    zoom = 11, scale = 4,
                    maptype ='terrain',
                    color = 'color')) +
  geom_point(aes(x = longitude, y = latitude,  colour = med_price_neighbour), data = fp_subset, size = 0.1) + 
  scale_colour_gradientn(colours = rainbow(3)) +
  labs(x = "", y = "", title = "Lower Manhattan has the highest central tendency of price ", subtitle = "Surprisingly Upper East/West side and Brooklyn seem to be similar in price range", caption = "From Technical Appendix", col="Price Range in USD") +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank()
        )
## Source : https://maps.googleapis.com/maps/api/staticmap?center=40.72,-73.95&zoom=11&size=640x640&scale=4&maptype=terrain&key=xxx
save_p_1
## Warning: Removed 137 rows containing missing values (geom_point).

ggsave(filename = "geo.png", plot = save_p_1)
## Saving 7 x 5 in image
## Warning: Removed 137 rows containing missing values (geom_point).

Comments

  • Financial district to midtown has higher price range compared to the upper east/west side and northern area of manhattan
  • There is one blue spot (highest median price) in Staten Island
pal <- colorNumeric(
  palette = "Blues",
  domain = fp$med_price_neighbour)
## Warning: Unknown or uninitialised column: 'med_price_neighbour'.
leaflet(options = leafletOptions(minZoom = 0, maxZoom = 18)) %>% 
  setView(lng = -73.95, lat = 40.72, zoom = 12)  %>% 
    addMarkers(data = fp, lat = ~ latitude, lng = ~ longitude, clusterOptions = markerClusterOptions(), label = ~htmlEscape(name), 
               popup = paste("Price per night: $", fp$price, "<br>",
                           "Room Type:", fp$room_type, "<br>",
                           "Minimum Stay:", fp$minimum_nights, "days<br>", 
                           "Minimum Cost for stay: $", fp$minimum_nights * fp$price, "<br>",
                           "Monthly Average Number of Reviews: ", fp$reviews_per_month)
               ) %>% 
  addTiles() 

Comments

  • Usuful for the dashboard

Stats EDA

Does Room_type impact the price?

(t <- t.test(fp$price[fp$room_type == 'Entire home/apt'], fp$price[fp$room_type == 'Private room' ], conf.level = 0.95))
## 
##  Welch Two Sample t-test
## 
## data:  fp$price[fp$room_type == "Entire home/apt"] and fp$price[fp$room_type == "Private room"]
## t = 47.672, df = 27227, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  125.2677 136.0103
## sample estimates:
## mean of x mean of y 
##   224.614    93.975
(t <- t.test(fp$price[fp$room_type == 'Entire home/apt'], fp$price[fp$room_type == 'Shared room' ], conf.level = 0.95))
## 
##  Welch Two Sample t-test
## 
## data:  fp$price[fp$room_type == "Entire home/apt"] and fp$price[fp$room_type == "Shared room"]
## t = 39.039, df = 1870.9, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  150.5066 166.4289
## sample estimates:
## mean of x mean of y 
## 224.61396  66.14617
(t <- t.test(fp$price[fp$room_type == 'Private room'], fp$price[fp$room_type == 'Shared room' ], conf.level = 0.95))
## 
##  Welch Two Sample t-test
## 
## data:  fp$price[fp$room_type == "Private room"] and fp$price[fp$room_type == "Shared room"]
## t = 7.6441, df = 1222.3, p-value = 4.238e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  20.68636 34.97129
## sample estimates:
## mean of x mean of y 
##  93.97500  66.14617

Comments

  • reject the null hypothesis that the room type does not impact the price
z <- qnorm(0.975) #95 percent

fp %>%
  group_by(room_type) %>%
  summarise(mn = mean(price), sd = sd(price), n = n(), ci = z * sd/sqrt(n)) %>%
  ggplot(aes(x = room_type, y = mn)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = mn - ci, ymax = mn + ci), width = 0.5, position = position_dodge(0.9)) +
  labs(title = "Price difference by Room type with error bar")

chisq.test(table(fp$neighbourhood_group, fp$room_type))
## 
##  Pearson's Chi-squared test
## 
## data:  table(fp$neighbourhood_group, fp$room_type)
## X-squared = 1169.7, df = 8, p-value < 2.2e-16

Comments

  • X-square is large,so reject the null
  • P-value is small, so reject the null
  • According to the test,we cannot see what is different and what is similar

How do neighbourhood_group’s listing price vary with room_type

C_P_n <- fp %>%
  group_by(neighbourhood_group, room_type) %>%
  summarise(n = n())

C_P_n_ci <- multinomialCI(t(C_P_n[, 3]), 0.05)

C_P_tab <- fp %>%
  group_by(neighbourhood_group, room_type) %>%
  summarise(prop = round(n()/sum(nrow(fp)), 3))

C_P_tab$ci_l <- round(C_P_n_ci[,1], 3)
C_P_tab$ci_u <- round(C_P_n_ci[,2], 3)

htmlTable(C_P_tab)
neighbourhood_group room_type prop ci_l ci_u
1 Bronx Entire home/apt 0.01 0.005 0.016
2 Bronx Private room 0.017 0.012 0.023
3 Bronx Shared room 0.002 0 0.007
4 Brooklyn Entire home/apt 0.194 0.188 0.199
5 Brooklyn Private room 0.187 0.182 0.193
6 Brooklyn Shared room 0.01 0.004 0.015
7 Manhattan Entire home/apt 0.269 0.264 0.275
8 Manhattan Private room 0.153 0.147 0.158
9 Manhattan Shared room 0.011 0.005 0.016
10 Queens Entire home/apt 0.049 0.044 0.055
11 Queens Private room 0.082 0.077 0.088
12 Queens Shared room 0.005 0 0.011
13 Staten Island Entire home/apt 0.005 0 0.01
14 Staten Island Private room 0.005 0 0.011
15 Staten Island Shared room 0 0 0.006
# Graph of proportions with confidence intervals
C_P_tab %>% 
  ggplot(aes(x = room_type, y = prop, fill = neighbourhood_group)) +
  geom_bar(stat="identity", position = "dodge") +
  geom_text(aes(label = round(prop, 2)), vjust = -4, color = "black", # vjust moves lables above CI
            position = position_dodge(0.9), size = 4) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u), 
                width = 0.4, position = position_dodge(0.9))

Comments

  • Proprotionally Manhattan, Brooklyn and Queens are more reliable in any room_type

Significance of Correlation

fp %>% 
  select_if(is.numeric) %>% 
  as.matrix() %>% 
  rcorr()
##                                latitude longitude price minimum_nights
## latitude                           1.00      0.08  0.03           0.04
## longitude                          0.08      1.00 -0.16          -0.08
## price                              0.03     -0.16  1.00           0.04
## minimum_nights                     0.04     -0.08  0.04           1.00
## number_of_reviews                 -0.01      0.03 -0.07          -0.12
## reviews_per_month                 -0.02      0.12 -0.08          -0.18
## calculated_host_listings_count     0.03     -0.15  0.06           0.12
## availability_365                   0.00      0.03  0.07           0.13
## list_history                       0.01     -0.08  0.00           0.07
## min_spend                          0.01     -0.05  0.46           0.39
##                                number_of_reviews reviews_per_month
## latitude                                   -0.01             -0.02
## longitude                                   0.03              0.12
## price                                      -0.07             -0.08
## minimum_nights                             -0.12             -0.18
## number_of_reviews                           1.00              0.54
## reviews_per_month                           0.54              1.00
## calculated_host_listings_count             -0.12             -0.11
## availability_365                            0.01             -0.09
## list_history                                0.50             -0.20
## min_spend                                  -0.05             -0.07
##                                calculated_host_listings_count
## latitude                                                 0.03
## longitude                                               -0.15
## price                                                    0.06
## minimum_nights                                           0.12
## number_of_reviews                                       -0.12
## reviews_per_month                                       -0.11
## calculated_host_listings_count                           1.00
## availability_365                                         0.19
## list_history                                            -0.10
## min_spend                                                0.06
##                                availability_365 list_history min_spend
## latitude                                   0.00         0.01      0.01
## longitude                                  0.03        -0.08     -0.05
## price                                      0.07         0.00      0.46
## minimum_nights                             0.13         0.07      0.39
## number_of_reviews                          0.01         0.50     -0.05
## reviews_per_month                         -0.09        -0.20     -0.07
## calculated_host_listings_count             0.19        -0.10      0.06
## availability_365                           1.00         0.14      0.06
## list_history                               0.14         1.00      0.03
## min_spend                                  0.06         0.03      1.00
## 
## n
##                                latitude longitude price minimum_nights
## latitude                          31354     31354 31354          31354
## longitude                         31354     31354 31354          31354
## price                             31354     31354 31354          31354
## minimum_nights                    31354     31354 31354          31354
## number_of_reviews                 31354     31354 31354          31354
## reviews_per_month                 31354     31354 31354          31354
## calculated_host_listings_count    31354     31354 31354          31354
## availability_365                  31354     31354 31354          31354
## list_history                      26147     26147 26147          26147
## min_spend                         31354     31354 31354          31354
##                                number_of_reviews reviews_per_month
## latitude                                   31354             31354
## longitude                                  31354             31354
## price                                      31354             31354
## minimum_nights                             31354             31354
## number_of_reviews                          31354             31354
## reviews_per_month                          31354             31354
## calculated_host_listings_count             31354             31354
## availability_365                           31354             31354
## list_history                               26147             26147
## min_spend                                  31354             31354
##                                calculated_host_listings_count
## latitude                                                31354
## longitude                                               31354
## price                                                   31354
## minimum_nights                                          31354
## number_of_reviews                                       31354
## reviews_per_month                                       31354
## calculated_host_listings_count                          31354
## availability_365                                        31354
## list_history                                            26147
## min_spend                                               31354
##                                availability_365 list_history min_spend
## latitude                                  31354        26147     31354
## longitude                                 31354        26147     31354
## price                                     31354        26147     31354
## minimum_nights                            31354        26147     31354
## number_of_reviews                         31354        26147     31354
## reviews_per_month                         31354        26147     31354
## calculated_host_listings_count            31354        26147     31354
## availability_365                          31354        26147     31354
## list_history                              26147        26147     26147
## min_spend                                 31354        26147     31354
## 
## P
##                                latitude longitude price  minimum_nights
## latitude                                0.0000    0.0000 0.0000        
## longitude                      0.0000             0.0000 0.0000        
## price                          0.0000   0.0000           0.0000        
## minimum_nights                 0.0000   0.0000    0.0000               
## number_of_reviews              0.0172   0.0000    0.0000 0.0000        
## reviews_per_month              0.0014   0.0000    0.0000 0.0000        
## calculated_host_listings_count 0.0000   0.0000    0.0000 0.0000        
## availability_365               0.5140   0.0000    0.0000 0.0000        
## list_history                   0.3137   0.0000    0.6345 0.0000        
## min_spend                      0.0113   0.0000    0.0000 0.0000        
##                                number_of_reviews reviews_per_month
## latitude                       0.0172            0.0014           
## longitude                      0.0000            0.0000           
## price                          0.0000            0.0000           
## minimum_nights                 0.0000            0.0000           
## number_of_reviews                                0.0000           
## reviews_per_month              0.0000                             
## calculated_host_listings_count 0.0000            0.0000           
## availability_365               0.0815            0.0000           
## list_history                   0.0000            0.0000           
## min_spend                      0.0000            0.0000           
##                                calculated_host_listings_count
## latitude                       0.0000                        
## longitude                      0.0000                        
## price                          0.0000                        
## minimum_nights                 0.0000                        
## number_of_reviews              0.0000                        
## reviews_per_month              0.0000                        
## calculated_host_listings_count                               
## availability_365               0.0000                        
## list_history                   0.0000                        
## min_spend                      0.0000                        
##                                availability_365 list_history min_spend
## latitude                       0.5140           0.3137       0.0113   
## longitude                      0.0000           0.0000       0.0000   
## price                          0.0000           0.6345       0.0000   
## minimum_nights                 0.0000           0.0000       0.0000   
## number_of_reviews              0.0815           0.0000       0.0000   
## reviews_per_month              0.0000           0.0000       0.0000   
## calculated_host_listings_count 0.0000           0.0000       0.0000   
## availability_365                                0.0000       0.0000   
## list_history                   0.0000                        0.0000   
## min_spend                      0.0000           0.0000

Multi-linear regression

# logistic regression with general linear model (glm)
mod <- glm(factor(price) ~ neighbourhood_group + room_type + minimum_nights + number_of_reviews + availability_365, 
           family = binomial(link='logit'), 
           data = fp)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Review output
summary(mod)
## 
## Call:
## glm(formula = factor(price) ~ neighbourhood_group + room_type + 
##     minimum_nights + number_of_reviews + availability_365, family = binomial(link = "logit"), 
##     data = fp)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.1745   0.0155   0.0198   0.0255   0.0731  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      5.912e+00  1.176e+00   5.027 4.98e-07 ***
## neighbourhood_groupBrooklyn      1.653e+00  1.166e+00   1.418    0.156    
## neighbourhood_groupManhattan     2.148e+00  1.247e+00   1.722    0.085 .  
## neighbourhood_groupQueens        4.886e-01  1.156e+00   0.423    0.673    
## neighbourhood_groupStaten Island 1.570e+01  2.609e+03   0.006    0.995    
## room_typePrivate room            3.197e-01  6.877e-01   0.465    0.642    
## room_typeShared room             1.466e+01  1.568e+03   0.009    0.993    
## minimum_nights                   8.820e-03  3.522e-02   0.250    0.802    
## number_of_reviews                4.475e-03  8.954e-03   0.500    0.617    
## availability_365                 2.830e-03  2.904e-03   0.974    0.330    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 164.80  on 31353  degrees of freedom
## Residual deviance: 158.05  on 31344  degrees of freedom
## AIC: 178.05
## 
## Number of Fisher Scoring iterations: 21

Comments

  • Manhattan, Brooklyn has more impact on price

Residuals of the model

par(mfrow = c(1, 1))
# price
plot(fp$price, mod$residuals)

# neighbourhood_group
plot(fp$neighbourhood_group, mod$residuals)

# neighbourhood
plot(fp$neighbourhood, mod$residuals)

# room_type
plot(fp$room_type, mod$residuals)

# minimum_nights
plot(fp$minimum_nights, mod$residuals)

# number_of_reviews
plot(fp$number_of_reviews, mod$residuals)

Visualization of Multiple Regression

coe <- summary(mod)$coefficients # get coefficients and related stats
coe_CI <- as.data.frame(cbind(coe[-1, ], confint(mod)[-1, ])) # find and bind CI, remove Intercept 
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
names(coe_CI) <- c("estimate", "se", "t", "pval","low_CI","high_CI") 

# Order base on p-value
htmlTable(round(coe_CI[order(coe_CI$pval, decreasing = FALSE), ], 3))
estimate se t pval low_CI high_CI
neighbourhood_groupManhattan 2.148 1.247 1.722 0.085 -0.947 4.534
neighbourhood_groupBrooklyn 1.653 1.166 1.418 0.156 -1.366 3.733
availability_365 0.003 0.003 0.974 0.33 -0.003 0.009
number_of_reviews 0.004 0.009 0.5 0.617 -0.008 0.028
room_typePrivate room 0.32 0.688 0.465 0.642 -1.042 1.744
neighbourhood_groupQueens 0.489 1.156 0.423 0.673 -2.519 2.547
minimum_nights 0.009 0.035 0.25 0.802 -0.006 0.117
room_typeShared room 14.662 1567.654 0.009 0.993 -127.713
neighbourhood_groupStaten Island 15.696 2608.514 0.006 0.995 -221.21
# reorder by p-value
(g1 <- ggplot(coe_CI, aes(x = estimate, y = reorder(row.names(coe_CI),desc(pval)))) +
  geom_point(size = 3) +
  xlim(min(coe_CI$low_CI), max(coe_CI$high_CI)) +
  ylab("Variable") +
  xlab("Coefficient") +
  theme_bw() 
)

# Use geom_segment to illustrate CI
(g2 <- g1 +
  geom_vline(xintercept = 0, color = "red")) +
  geom_segment(aes(yend = reorder(row.names(coe_CI),desc(pval))), 
               xend = coe_CI$high_CI, color = "Blue") +
  geom_segment(aes(yend = reorder(row.names(coe_CI),desc(coe_CI$pval))), 
               xend = coe_CI$low_CI, color = "Blue") +
  xlab("Coefficient with Confidence Interval") 
## Warning: Removed 2 rows containing missing values (geom_segment).
  • Shared Room(room_type) and Staten Island (neighbourhood_group) have interesting confidence interval
  • in the non-graphical, we cannot see the upper bound of confidence interval for those two

Summary

Comments

  • Manhattan has the highest price with Entire home/apartment
    • Manhattan’s Entire home/apartment’s minimum spending is extremely higher than the other neighbourhood in new york
  • From the map, particularily Brooklyn area is high as in upper manhattan
  • Problems on the data
    • does not have the size of listing’s living area
    • more detail information may be required to get in order to do more investigation